from lxml import etree


parser = etree.HTMLParser(encoding='utf-8')

html = etree.parse('E:\index.html', parser=parser)

# 获取所有的tr
trs = html.xpath("//li")
# for tr in trs:
#   print(etree.tostring(tr, encoding='utf-8').decode('utf-8'))

# ipt = html.xpath('//input[1]')
# ipt = html.xpath('//input[@type="text"]')
# ipt = html.xpath('.//a/@href')
data = []
lis = html.xpath("//div[@id='tFocus-btn']//li")
for ip in lis:
  src = ip.xpath("img/@src")[0]
  title = ip.xpath("span[2]/text()")[0]
  num = ip.xpath("span[1]/text()")[0]

  data.append({'src':src, 'title':title, 'num': num})
  # print(etree.tostring(ip, encoding='utf-8').decode('utf-8'))
  # print(ip)
print(data)